# coding:utf-8

import scrapy
from scrapy.selector import Selector
from ..items import NewsItem
from scrapy.spiders import Spider
from ..spiders import utils_crawler

source = u'南方网'


class SouthcnSpider(Spider):
    name = "southcn"
    base_domains = 'http://opinion.southcn.com'
    start_urls = [
        'http://opinion.southcn.com/o/node_78585.htm',
        'http://opinion.southcn.com/o/node_78586.htm',
        'http://opinion.southcn.com/o/node_82377.htm',
        'http://opinion.southcn.com/o/node_96464.htm',
        'http://opinion.southcn.com/o/node_82376.htm'
    ]

    def parse(self, response):
        sel = Selector(response)
        classify = sel.xpath('//div[@class="m-crm g-wp"]/a[3]/text()').extract()[0]

        item_urls = sel.xpath('//div[@class="m-lists"]//div/@data-link').extract()
        for item_url in item_urls:
            yield scrapy.Request(url=item_url, meta={'classify': classify}, callback=self.parse_details)

    def parse_details(self, response):
        item = NewsItem()
        sel = Selector(response)

        item['title'] = sel.xpath('//h2[@id="article_title"]/text()').extract()[0].strip()
        item['href'] = response.url
        item['time'] = sel.xpath('//span[@id="pubtime_baidu"]/text()').extract()[0]
        item['content'] = utils_crawler.deal_content(sel.xpath('//div[@class="content"]//p/text()').extract())
        item['source'] = source
        item['image_urls'] = sel.xpath('//div[@class="content"]//img/@src').extract()

        return item
